#############################################################################################
## Simulation by calibrating from 1999 estimates -- III. Merge: abind the results and plot ##
## Model refers to "Distribution Regression with Selection"
## Authors: Chernozhukov, V., I. Fernandez-Val and S. Luo
## Last Update: 1/18/2018





rm(list=ls());

library(abind);


#####################
# Extract Arguments #
#####################
#Define default values for variables
n.boot         <- 200;
counter.method <- 2; 
# 1 for counterfactual decades within gender; 
# 2 for counterfactual sex within time period; 
# 3 for counterfactual 18 years within gender;
sample.id      <- 113104;


str.sample <- ifelse(counter.method==1, ifelse(sample.id==1, "_m","_f"), 
                     ifelse(counter.method==2, paste("_",sample.id,sep=""), ifelse(sample.id==1, "_mhalf","_fhalf")));


################################################
# Source the file which contains the functions #
################################################
load(paste("Est_female",str.sample,".RData",sep=""));
source("DRS_Source.R");

#################################
## Load the simulation results ##
#################################
n.sim <- 500;

sim.result <- readRDS(paste(getwd(),"/Simulations/Sim_f",str.sample,"1.rds",sep=""));
for (i in 2:n.sim){
  sim.result <- abind(sim.result, readRDS(paste(getwd(),"/Simulations/Sim_f",str.sample,i,".rds",sep="")), along=3);
}; # (4*n.thres+6)*(n.par+2)*n.sim;

q.lower<-0.1;
q.upper<-0.9;
q.cut <- c(((q.lower-l.thres)/by.thres+1):((q.upper-l.thres)/by.thres+1));

current.na.action <- options('na.action');
options(na.action='na.pass');
Xwc.all <- model.matrix(outcome.formula, mysample);
X.name  <- colnames(Xwc.all);
n.par <- length(X.name);

options(current.na.action);


##################################
## Construct Simulation Measure ##
##################################
beta.0.f <- cbind(ys-beta.Heckman.f[1], -t(replicate(n.thres,beta.Heckman.f[-1])))/sigma.Heckman.f;

sim.pi.f <- sim.result[1,,]; # (n.par+2)*n.sim;
sim.beta.f <- sim.result[2+c(1:n.thres),1:length(X.name),]; # n.thres*n.par*n.sim;
sim.rho.f <- tanh(sim.result[2+c(1:n.thres),length(X.name)+1,]); # n.thres*n.sim;

# Bias #
bias.pi.f <- rowMeans(sim.pi.f,na.rm=T) - pi.Heckman.f;
bias.beta.f <- rowMeans(sim.beta.f,na.rm=T,dims=2) - beta.0.f;
bias.rho.f <- rowMeans(sim.rho.f,na.rm=T) + rho.Heckman.f;

# SD #
sd.pi.f <- apply(sim.pi.f, 1, sd, na.rm=T);
sd.beta.f <- apply(sim.beta.f,c(1,2),sd,na.rm=T);
sd.rho.f <- apply(sim.rho.f, 1, sd, na.rm=T);

# RMSE #
rmse.pi.f <- sqrt(rowMeans((sim.pi.f - replicate(n.sim,pi.Heckman.f))^2,na.rm=T));
rmse.beta.f <- sqrt(rowMeans((sim.beta.f - replicate(n.sim,beta.0.f))^2,na.rm=T,dims=2));
rmse.rho.f <- sqrt(rowMeans((sim.rho.f + replicate(n.sim,rho.Heckman.f))^2,na.rm=T));



## For Confidence Bands ##
# SE #
se.pi.f <- sim.result[2,,]; # (n.par+2)*n.sim;
se.beta.f <- sqrt(sim.result[2+n.thres+c(1:n.thres),1:length(X.name),]); # n.thres*n.par*n.sim;
se.rho.f <- sqrt(sim.result[2+n.thres+c(1:n.thres),length(X.name)+1,])*(1-sim.rho.f^2); # n.thres*n.sim;

# SESD #
sesd.pi.f <- rowMeans(se.pi.f,na.rm=T)/sd.pi.f;
sesd.beta.f <- rowMeans(se.beta.f,na.rm=T,dims=2)/sd.beta.f;
sesd.rho.f <- rowMeans(se.rho.f,na.rm=T)/sd.rho.f;

# crt #
crt.sim.f <- sim.result[2+2*n.thres+1,,]; # (n.par+1)*n.sim;
crt.f <- rowMeans(crt.sim.f, na.rm=T); # (n.par+1);

alpha <- 0.05;
width.sim.pi.f <- 2*qnorm(1-alpha/2)*se.pi.f; # (n.par+2)*n.sim;
width.sim.beta.f <- 2*aperm(replicate(n.thres,crt.sim.f[1:length(X.name),]),c(3,1,2))*se.beta.f; # n.thres*n.par*n.sim;
width.sim.rho.f <- 2*t(replicate(n.thres,crt.sim.f[length(X.name)+1,]))*se.rho.f; # n.thres*n.sim;

width.pi.f <- rowMeans(width.sim.pi.f, na.rm=T); # n.par+2;
width.beta.f <- rowMeans(width.sim.beta.f, na.rm=T, dims=2); # n.thres*n.par;
width.rho.f <- rowMeans(width.sim.rho.f, na.rm=T); # n.thres;

ind.sim.pi.f <- sim.pi.f + width.sim.pi.f/2 >= replicate(n.sim, pi.Heckman.f)&
                sim.pi.f - width.sim.pi.f/2 <= replicate(n.sim, pi.Heckman.f); # (n.par+2)*n.sim;
ind.sim.beta.f <- sim.beta.f + width.sim.beta.f/2 >= replicate(n.sim, beta.0.f)&
                  sim.beta.f - width.sim.beta.f/2 <= replicate(n.sim, beta.0.f); # n.thres*n.par*n.sim;
ind.sim.rho.f <- sim.rho.f + width.sim.rho.f/2 >= -replicate(n.sim, rho.Heckman.f)&
                 sim.rho.f - width.sim.rho.f/2 <= -replicate(n.sim, rho.Heckman.f); # n.thres*n.sim;


coverage.pi.f <- rowMeans(ind.sim.pi.f,na.rm=T);
coverage.beta.f <- rowMeans(apply(ind.sim.beta.f[q.cut,,],c(2,3),min,na.rm=T),na.rm=T);
coverage.rho.f <- mean(apply(ind.sim.rho.f[q.cut,],2,min,na.rm=T),na.rm=T);


# Pointwise C.I. #
width.sim.beta.f.pw <- 2*qnorm(1-alpha/2)*se.beta.f; # n.thres*n.par*n.sim;
width.sim.rho.f.pw <- 2*qnorm(1-alpha/2)*se.rho.f; # n.thres*n.sim;

width.beta.f.pw <- rowMeans(width.sim.beta.f.pw, na.rm=T, dims=2); # n.thres*n.par;
width.rho.f.pw <- rowMeans(width.sim.rho.f.pw, na.rm=T); # n.thres;

ind.sim.beta.f.pw <- sim.beta.f + width.sim.beta.f.pw/2 >= replicate(n.sim, beta.0.f)&
                     sim.beta.f - width.sim.beta.f.pw/2 <= replicate(n.sim, beta.0.f); # n.thres*n.par*n.sim;
ind.sim.rho.f.pw <- sim.rho.f + width.sim.rho.f.pw/2 >= -replicate(n.sim, rho.Heckman.f)&
                    sim.rho.f - width.sim.rho.f.pw/2 <= -replicate(n.sim, rho.Heckman.f); # n.thres*n.sim;

coverage.beta.f.pw <- rowMeans(apply(ind.sim.beta.f.pw[q.cut,,],c(2,3),min,na.rm=T),na.rm=T);
coverage.rho.f.pw  <- mean(apply(ind.sim.rho.f.pw[q.cut,],2,min,na.rm=T),na.rm=T);



###############
# Print Table #
###############
print.para <- c("educ2122","couple");
table.f <- rbind(c(colMeans(width.beta.f)[print.para],mean(width.rho.f)),
                        c(crt.f[print.para],crt.f[length(X.name)+1]),
                        c(coverage.beta.f[print.para],coverage.rho.f)*100,
                        c(coverage.beta.f.pw[print.para],coverage.rho.f.pw)*100,
                        c(colMeans(sesd.beta.f)[print.para],mean(sesd.rho.f)));
colnames(table.f) <- c(print.para,"rho");
rownames(table.f) <- c("Average Length",
                        "Average Critical Value",
                        "Coverage uniform band(%)",
                        "Coverage pointwise band(%)",
                        "Average SE/SD");

save.image(paste("Sim_f",str.sample,".RData",sep=""));

library(xtable);
options(xtable.floating=FALSE);
options(xtable.timestamp="");

xtable(table.f);

###############
# Plot Graphs #
###############
if(counter.method == 1){
  main.str <- ifelse(sample.id==1, "Male","Female");
  main.str.counter1 <- paste(main.str," in 1980's", sep="");
  main.str.f <- paste(main.str," in 2000's", sep="");
  
  legend.counter1 <- "1980s";
  legend.f <- "2000s";
}else if(counter.method == 3){
  main.str <- ifelse(sample.id==1, "Male","Female");
  main.str.counter1 <- paste(main.str," in recent half: 1996-2013", sep="");
  main.str.f <- paste(main.str," in previous half: 1978-1995", sep="");
  
  legend.counter1 <- "recent half";
  legend.f <- "prev half";
}else{
  if (sample.id >= 1000){
    if(sample.id >= 100000){
      from.year <- sample.id %/% 1000;
      to.year   <- sample.id - from.year*1000;
    }else{
      from.year <- sample.id %/% 100;
      to.year   <- sample.id - from.year*100;
    };
    
    main.str <- paste(1900+to.year, " ~ ", 1900+from.year, sep="");
    main.str.counter1 <- paste("Men in ", 1900+to.year, " ~ ", 1900+from.year, sep="");
    main.str.f <- paste("Women in ", 1900+to.year, " ~ ", 1900+from.year, sep="");
  }else{
    sample.id.str <- 1900+sample.id;
    main.str <- sample.id.str;
    main.str.counter1 <- paste("Men in ", sample.id.str, sep="");
    main.str.f <- paste("Women in ", sample.id.str, sep="");
  };
  
  legend.counter1 <- "male";
  legend.f <- "female";
};

str.para <- c("HS","Further","SomeCol","Col","Grad","Married");
names(str.para) <- names(beta.Heckman.f[2:7]);

# Coefficient #

pdf(paste("Sim_f",str.sample,"_coef.pdf",sep=""));
para.name <- print.para;
if(sample.id == 113109){
  ylimlist <- rbind(c(-2,17),c(-10,120));
  ylimsesd <- c(0.9,1.4);
  ylimrho <- c(0,35);
}else{
  ylimlist <- rbind(c(-2,10),c(-30,210));
  ylimsesd <- c(0.9,1.4);
  ylimrho <- c(-5,30);
}

# Counterfactual Level 2 #
for(i in 1:length(para.name)){
  plot(seq(q.lower,q.upper,by.thres), bias.beta.f[q.cut,para.name[i]]/abs(beta.0.f[q.cut,para.name[i]])*100, lty=1, type="l",
       xlab="Quantile", ylab="% of True Parameter Value", ylim=ylimlist[i,],
       main=paste("Bias of ",str.para[para.name[i]],", ",main.str.f,sep=""));
  abline(h=0,lty=2);
  
  plot(seq(q.lower,q.upper,by.thres), sd.beta.f[q.cut,para.name[i]]/abs(beta.0.f[q.cut,para.name[i]])*100, lty=1, type="l",
       xlab="Quantile", ylab="% of True Parameter Value", ylim=ylimlist[i,],
       main=paste("SD of ",str.para[para.name[i]],", ",main.str.f,sep=""));
  abline(h=0,lty=2);
  
  plot(seq(q.lower,q.upper,by.thres), rmse.beta.f[q.cut,para.name[i]]/abs(beta.0.f[q.cut,para.name[i]])*100, lty=1, type="l",
       xlab="Quantile", ylab="% of True Parameter Value", ylim=ylimlist[i,],
       main=paste("RMSE of ",str.para[para.name[i]],", ",main.str.f,sep=""));
  abline(h=0,lty=2);
  
  
  plot(seq(q.lower,q.upper,by.thres),sesd.beta.f[q.cut,para.name[i]],lty=1,type="l",ylim=ylimsesd, xlab="Quantile",
       ylab="% of True Parameter Value",main=paste("SE/SD of ",str.para[para.name[i]],", ",main.str.f,sep=""));
  abline(h=1,lty=2); 
};

plot(seq(q.lower,q.upper,by.thres), bias.rho.f[q.cut]/abs(rho.Heckman.f)*100, lty=1, type="l", ylim=ylimrho,
     xlab="Quantile",ylab="% of True Parameter Value",main=paste("Bias of rho, ",main.str.f,sep=""));
abline(h=0,lty=2);

plot(seq(q.lower,q.upper,by.thres), sd.rho.f[q.cut]/abs(rho.Heckman.f)*100, lty=1, type="l", ylim=ylimrho,
     xlab="Quantile",ylab="% of True Parameter Value",main=paste("SD of rho, ",main.str.f,sep=""));
abline(h=0,lty=2);

plot(seq(q.lower,q.upper,by.thres), rmse.rho.f[q.cut]/abs(rho.Heckman.f)*100, lty=1, type="l", ylim=ylimrho,
     xlab="Quantile",ylab="% of True Parameter Value",main=paste("RMSE of rho, ",main.str.f,sep=""));
abline(h=0,lty=2);

plot(seq(q.lower,q.upper,by.thres),sesd.rho.f[q.cut],lty=1,type="l",xlab="Quantile", ylim=ylimsesd,
     ylab="% of True Parameter Value",main=paste("SE/SD of rho, ",main.str.f,sep=""));
abline(h=1,lty=2); 



dev.off();
